#####################################
## Clean ESS 2018 data 
#####################################

library(Hmisc)
library(tidyverse)
library(ISCO08ConveRsions)

setwd() # set working directory to the folder with the replication materials

# Load ESS wave 9
dat <- spss.get("ESS9e02.sav")

# Filter western European countries
dat <- dat %>% filter(cntry %in% c("Austria", "Belgium", "Switzerland", "Spain",
                                    "Germany", "Finland", "France", 
                                    "United Kingdom",
                                    "Ireland", "Italy", "Netherlands", "Norway", "Sweden", 
                                    "Hungary","Poland") # <- Two countries for analysis in the appendix 
                      )

# Recode fairness variables
dat <- dat %>% mutate(inc_unfair = recode(netifr, 
                                        "Low, extremely unfair" = 4,
                                        "Low, very unfair" = 3,
                                        "Low, somewhat unfair" = 2,
                                        "Low, slightly unfair" = 1,
                                        "Fair" = 0,	
                                        "High, slightly unfair" = -1,	
                                        "High, somewhat unfair" = -2,
                                        "High, very unfair" = -3,
                                        "High, extremely unfair" = -4),
                      d_inc_unfair = ifelse(inc_unfair > 0, 1, 0),
                      top_inc_unfair = recode(topinfr, 
                                          "Low, extremely unfair" = -4,
                                          "Low, very unfair" = -3,
                                          "Low, somewhat unfair" = -2,
                                          "Low, slightly unfair" = -1,
                                          "Fair" = 0,	
                                          "High, slightly unfair" = 1,
                                          "High, somewhat unfair" = 2,
                                          "High, very unfair" = 3,
                                          "High, extremely unfair" = 4),
                      d_top_inc_unfair = ifelse(top_inc_unfair > 0, 1, 0),
                      bttm_inc_unfair = recode(btminfr, 
                                           "Low, extremely unfair" = 4,
                                           "Low, very unfair" = 3,
                                           "Low, somewhat unfair" = 2,
                                           "Low, slightly unfair" = 1,
                                           "Fair" = 0,	
                                           "High, slightly unfair" = -1,	
                                           "High, somewhat unfair" = -2,
                                           "High, very unfair" = -3,
                                           "High, extremely unfair" = -4),
                      d_bttm_inc_unfair = ifelse(bttm_inc_unfair > 0, 1, 0),
                      job_self_fair = as.numeric(recode(as.character(ifrjob),
                               "Does not apply at all" = "0", 
                               "Applies completely" = "10")),
                      job_self_unfair = -(job_self_fair - 10),
                      d_job_self_unfair = ifelse(job_self_unfair>5, 1, 0),
                      job_other_fair = as.numeric(recode(as.character(evfrjob),
                               "Does not apply at all" = "0", 
                               "Applies completely" = "10")),
                      job_other_unfair = -(job_other_fair - 10),
                      d_job_other_unfair = ifelse(job_other_unfair>5, 1, 0)
                      ) 

# Recode variables about immigration and redistribution attitudes
dat <- dat %>% mutate(
               imm_culture = as.numeric(recode(as.character(imueclt),
                                  "Cultural life undermined" = "0", 
                                  "Cultural life enriched" = "10")),
               redist = recode(gincdif, "Agree strongly" = 5,
                               "Agree" = 4,
                               "Neither agree nor disagree" = 3,
                               "Disagree" = 2,
                               "Disagree strongly" = 1)
               )

# Recode trust variables
dat$trstprl <- as.character(dat$trstprl)
dat$trstplt <- as.character(dat$trstplt)
dat$stfdem <- as.character(dat$stfdem)
unique(dat$trstprl)
dat <- dat %>% mutate(trust_prl = recode(trstprl,
                                         "No trust at all" = 0,
                                         "1" = 1,
                                         "2" = 2,
                                         "3" = 3,
                                         "4" = 4,
                                         "5" = 5,
                                         "6" = 6,
                                         "7" = 7,
                                         "8" = 8,
                                         "9" = 9,
                                         "Complete trust" = 10),
                      trust_plt = recode(trstplt,
                                         "No trust at all" = 0,
                                         "1" = 1,
                                         "2" = 2,
                                         "3" = 3,
                                         "4" = 4,
                                         "5" = 5,
                                         "6" = 6,
                                         "7" = 7,
                                         "8" = 8,
                                         "9" = 9,
                                         "Complete trust" = 10),
                      trust_pols = (trust_plt + trust_prl)/2,
                      sat_democracy = recode(stfdem,
                                             "Extremely dissatisfied" = 0,
                                             "1" = 1,
                                             "2" = 2,
                                             "3" = 3,
                                             "4" = 4,
                                             "5" = 5,
                                             "6" = 6,
                                             "7" = 7,
                                             "8" = 8,
                                             "9" = 9,
                                             "Extremely satisfied" = 10)
                      )

# Recode absolute deprivation and authoritarian value
dat <- dat %>% mutate(abs_deprivation = recode(hincfel,
                                               "Living comfortably on present income" = 1,
                                               "Coping on present income" = 2,
                                               "Difficult on present income" = 3,
                                               "Very difficult on present income" = 4),
                      authoritarian = recode(ipstrgv,
                                             "Very much like me" = 6,
                                             "Like me" = 5,
                                             "Somewhat like me" = 4,
                                             "A little like me" = 3,
                                             "Not like me" = 2,
                                             "Not like me at all" = 1))
       
## Recode control variables

# Education
education <- read.csv("ESS_education_level.csv")

dat <- dat %>%
  mutate(edulvlb = as.character(edulvlb)) %>%
  left_join(education, by = c("edulvlb" = "edu_name")) %>%
  mutate(edu_level = as.numeric(substr(edu_level, 1, 1)),
         college =  ifelse(edu_level>=6,1,0))

# income, female, age, union, unemployed, citizen, religiosity, urban, turnout
dat <- dat %>% mutate(female = ifelse(gndr=="Female", 1, 0),
                      age = as.numeric(as.character(agea)),
                      region = as.character(region),
                      income = recode(hinctnta, 
                                      "J - 1st decile" = 1L,
                                      "R - 2nd decile" = 2L, 
                                      "C - 3rd decile" = 3L,
                                      "M - 4th decile" = 4L,
                                      "F - 5th decile" = 5L,
                                      "S - 6th decile" = 6L,
                                      "K - 7th decile" = 7L,
                                      "P - 8th decile" = 8L,
                                      "D - 9th decile" = 9L,
                                      "H - 10th decile" = 10L),
                      union = ifelse(mbtru %in% c("Yes, currently"), 1, 0),
                      citizen = ifelse( ctzcntr == "Yes", 1, 0),
                      religion = as.numeric(recode(as.character(rlgdgr),
                                                    "Not at all religious" = "0", 
                                                    "Very religious" = "10")),
                      urban = ifelse(domicil %in% c("A big city",
                                                    "Suburbs or outskirts of big city"), 1, 0),
                      unemployed = ifelse(mnactic %in% c("Unemployed, looking for job",
                                                         "Unemployed, not looking for job"), 1, 0),
                      voted = recode(vote, "Yes" = 1 , "No" = 0))


## Append RTI score 
# numeric ISCO 08 code
occupation <- read.csv("ESS_isco08.csv")

# Correct errors in occupation names
dat$isco08[dat$isco08=="Commissioned armed forces officers_duplicated_110"] <- "Commissioned armed forces officers"
dat$isco08[dat$isco08=="Non-commissioned armed forces officers_duplicated_210"] <- "Non-commissioned armed forces officers"
dat$isco08[dat$isco08=="Armed forces occupations, other ranks_duplicated_310"] <- "Armed forces occupations, other ranks"
dat$isco08[dat$isco08=="Police inspectors and detectives_duplicated_3411"] <- "Police inspectors and detectives"
dat$isco08[dat$isco08=="Agricultural, forestry and fishery labourers_duplicated_9210"] <- "Agricultural, forestry and fishery labourers"
dat$isco08[dat$isco08=="Protective services workers_duplicated_5410"] <- "Protective services workers"
dat$isco08[dat$isco08=="Assemblers_duplicated_8210"] <- "Assemblers"

# Append numeric isco codes
dat$isco08 <- as.character(dat$isco08)
dat <- dat %>% left_join(occupation, by = c("isco08" = "occ_name"))
dat$isco08_n <- str_pad(dat$isco08_n,4,"left","0")

# Convert isco08 to isco88, and make them two digits
dat$isco88_n <-  sapply(dat$isco08_n, function(x) if (is.na(x)==1) {NA} else {isco08toisco88(x)} )
dat$isco88_2d <- substr(dat$isco88_n, 1, 2)

# Append RTI scores from Goos, Manning, and Salomons (2014) 
rti_gsm <- read.csv("GMS_rti.csv")
rti_gsm$isco88 <- as.character(rti_gsm$isco88)
dat <- left_join(dat, rti_gsm, by = c("isco88_2d" = "isco88"))

## Party variables
# Columns of party vote for each country
prtv <- grep("prtv",colnames(dat))

# Change into character vectors
dat <- dat %>% mutate_at(prtv, as.character)

col_fun <- function(df, Cols){
  df2 <- df %>%
    mutate(party = coalesce(!!!as.list(df[, Cols])))
  return(df2)
}

# Create an integrated column of party vote
dat <- col_fun(df = dat, Cols = prtv)

# Party Family from PopuList and Manifesto Project
partylist <-  read.csv("party_list.csv") 
dat$cntry <- as.character(dat$cntry)
dat <- dat %>% left_join(partylist, by = c("cntry", "party"))

##################################
# Regional Variables
##################################
nuts <- read.csv("ESS_nuts.csv")

# Merge the NUTS codes
dat <- dat %>% left_join(nuts, by = c("region" = "nuts_name"))

# Change NUTS3 to NUTS2 for Finland and Ireland
dat$nuts2 <- dat$nuts_code
dat$nuts2[dat$cntry %in% c("Finland", "Ireland")] <- substr(dat$nuts2[dat$cntry %in% c("Finland", "Ireland")],1,4)

# Append regional variables from Eurostat
region_vars <- read.csv("regional_variables.csv")

dat <- dat %>% left_join(region_vars, by = c("nuts2" = "GEO"))

# Nuts 13 -> 03 crosswalk
nuts03_13 <- read.csv("nuts_change.csv")

dat <- dat %>% left_join(nuts03_13, by = "nuts2")

# Append net changes in manufacturing, non-manufacturing jobs computed from ERM
erm <- read.csv("erm_cleaned.csv")
dat <- dat %>% left_join(erm, by = c("code_2003" = "geo"))

## Compute changes in manufacturing and non-manufacturing jobs per employee
dat$netjobreduction_manu_scaled <- dat$netjobreduction_manu/dat$employment
dat$netjobreduction_non_manu_scaled <- dat$netjobreduction_non_manu/dat$employment


# Select variables used in analysis
dat <- dat %>% dplyr::select(inc_unfair, d_inc_unfair, top_inc_unfair, d_top_inc_unfair,
                             bttm_inc_unfair, d_bttm_inc_unfair, 
                             job_self_unfair, d_job_self_unfair, job_other_unfair, d_job_other_unfair,
                             imm_culture, redist,
                             populist, far_right, far_left, has_seats, 
                             center_left, center_right,
                             trust_plt, trust_prl, trust_pols, sat_democracy,
                             abs_deprivation, authoritarian,
                             college, female, age, income, union, 
                             religion, urban, unemployed, citizen, voted,
                             RTI, netjobreduction_manu_scaled, netjobreduction_non_manu_scaled,
                             college_share, male_share, unemp_rate, imm_share, 
                             cntry, code_2003, dweight)

# Save
ess_western <- dat %>% filter(! cntry %in% c("Poland", "Hungary"))
ess_eastern <- dat %>% filter(  cntry %in% c("Poland", "Hungary"))

write.csv(ess_western, "ESS_cleaned.csv", row.names = FALSE)
write.csv(ess_eastern, "ESS_cleaned_eastern.csv", row.names = FALSE)


